################################################################################
# PACKAGE AND DATA LOADING
################################################################################
pacman::p_load(stm, fixest, modelsummary, ggplot2)

df <- readRDS('stm_master_df.rds')
kResult <- readRDS('stm_kResult.rds')

##############################
### Structural Topic Model ###
##############################
set.seed(25876)
processed <- textProcessor(df$text, metadata = df,language="english",
                           removestopwords=TRUE,removenumbers=TRUE,removepunctuation=TRUE,
                           lowercase=TRUE,ucp=TRUE,verbose=TRUE)
out <- prepDocuments(processed$documents, processed$vocab, processed$meta)
docs <- out$documents
vocab <- out$vocab
meta <- out$meta

plotRemoved(processed$documents, lower.thresh = seq(1, 200, by = 5))

fit15k <- stm(documents = out$documents, vocab = out$vocab, K = 15, prevalence =~ as.factor(year)+as.factor(month)+
                ccode305+ccode355+ccode344+ccode352+ccode366+ccode375+ccode350+ccode350+ccode310+
                ccode367+ccode368+ccode290+ccode360+ccode317+ccode349+ccode230+ccode200, max.em.its = 500, data = out$meta, init.type = "Spectral")

################################################################################
# FIGURES
################################################################################

# Topic labels
topics_labels  <- c("EU cooperation:","Joint operations:","Gen. cooperation:",
                    "Smuggling:","Trafficking:","Training:","Greek coast:",
                    "Recruitment:","Rescue:","Illegal crossings:","R & D:",
                    "Ukrainians:","Illegal migration:","Human rights:",
                    "Int. cooperation:")
topics_labels1 <- gsub(":", "", topics_labels)

exclus <- unlist(kResult$results$exclus)
semcoh <- unlist(kResult$results$semcoh)
df_ev  <- data.frame(k = 2:20, exclus = exclus, semcoh = semcoh)

png('Figure A.5_1.png',
    3000, 3000, res = 650)
ggplot(df_ev, aes(x = k, y = exclus, group = 1)) +
  geom_line() + geom_point() +
  ylab("Exclusivity") + xlab("Number of Topics") +
  ggtitle("Exclusivity") + theme_light()
dev.off()

png('Figure A.5_2.png',
    3000, 3000, res = 650)
ggplot(df_ev, aes(x = k, y = semcoh, group = 1)) +
  geom_line() + geom_point() +
  ylab("Semantic Coherence") + xlab("Number of Topics") +
  ggtitle("Semantic Coherence") + theme_light()
dev.off()

# --- Figure A.6: STM topic summary ---
png('Figure A.6.png',
    3000, 3000, res = 325)
plot(fit15k, type = "summary", n = 10, xlim = c(0, 4), topic.names = topics_labels)
dev.off()

# --- Figure A.7: Topic correlations ---
cormat <- topicCorr(fit15k)
png('Figure A.7.png',
    3000, 3000, res = 575)
plot(cormat, vlabels = topics_labels1)
dev.off()

# --- Figure 5: Human rights vs security topic trends over time ---
df2 <- df[-out$docs.removed,]

df2$topic_humanrights <- fit15k$theta[,14]
df2$topic_crime <- fit15k$theta[,4] + fit15k$theta[,5] + fit15k$theta[,10] + fit15k$theta[,13]

df3 <- data.frame(
  year       = c(df2$year, df2$year),
  date       = c(df2$date, df2$date),
  med.sea    = c(df2$med.sea, df2$med.sea),
  topic      = c(rep("Human rights",    nrow(df2)),
                 rep("Security Issues", nrow(df2))),
  proportion = c(df2$topic_humanrights, df2$topic_crime)
)

png('Figure 5.png',
    4000, 3000, res = 600)
ggplot(df3, aes(x = date, y = proportion, group = topic)) +
  geom_smooth(aes(colour = topic), span = 0.3) +
  scale_y_continuous(labels = scales::percent) +
  ylab("Topic Proportion") + xlab("Year") +
  scale_color_manual(values = c('blue1', 'red3')) +
  theme_bw() +
  theme(legend.title = element_blank())
dev.off()

################################################################################
# SECTION 4: TABLES
################################################################################

cm <- c("wall"           = "Walls",
        "log(wall + 1)"  = "Walls (log)",
        "med.sea"        = "Mediterranean Border")

# --- Human rights topic models ---
hr1 <- feglm(topic_humanrights ~ wall           | year,         df2, cluster = "year", family = binomial(link = "logit"))
hr2 <- feglm(topic_humanrights ~ log(wall + 1)  | year,         df2, cluster = "year", family = binomial(link = "logit"))
hr3 <- feglm(topic_humanrights ~ wall           | year + month, df2, cluster = "year", family = binomial(link = "logit"))
hr5 <- feglm(topic_humanrights ~ log(wall + 1)  | year + month, df2, cluster = "year", family = binomial(link = "logit"))
hr4 <- feglm(topic_humanrights ~ wall           + med.sea | year + month, df2, cluster = "year", family = binomial(link = "logit"))
hr6 <- feglm(topic_humanrights ~ log(wall + 1)  + med.sea | year + month, df2, cluster = "year", family = binomial(link = "logit"))

# --- Security topic models ---
sec1 <- feglm(topic_crime ~ wall           | year,         df2, cluster = "year", family = binomial(link = "logit"))
sec2 <- feglm(topic_crime ~ log(wall + 1)  | year,         df2, cluster = "year", family = binomial(link = "logit"))
sec3 <- feglm(topic_crime ~ wall           | year + month, df2, cluster = "year", family = binomial(link = "logit"))
sec5 <- feglm(topic_crime ~ log(wall + 1)  | year + month, df2, cluster = "year", family = binomial(link = "logit"))
sec4 <- feglm(topic_crime ~ wall           + med.sea | year + month, df2, cluster = "year", family = binomial(link = "logit"))
sec6 <- feglm(topic_crime ~ log(wall + 1)  + med.sea | year + month, df2, cluster = "year", family = binomial(link = "logit"))

# --- Table 4 ---
models <- list("HR (1)"       = hr1,  "HR (2)"       = hr2,
               "HR (3)"       = hr3,  "HR (4)"       = hr5,
               "Security (1)" = sec1, "Security (2)" = sec2,
               "Security (3)" = sec3, "Security (5)" = sec5)
modelsummary(models, stars = TRUE, escape = FALSE,
             output = 'Table 4.html',
             coef_map = cm, fmt = 2, vcov = "robust",
             gof_omit = "Std.Errors|AIC|BIC|RMSE|R2.Within|R2W|R2W.Adj",
             notes = list("Robust standard errors reported."))

# --- Table A.17 ---
models <- list("HR (1)"       = hr4,  "HR (2)"       = hr6,
               "Security (1)" = sec4, "Security (2)" = sec6)
modelsummary(models, stars = TRUE, escape = FALSE,
             output = 'Table A.17.html',
             coef_map = cm, fmt = 2, vcov = "robust",
             gof_omit = "Std.Errors|AIC|BIC|RMSE|R2.Within|R2W|R2W.Adj",
             notes = list("Robust standard errors reported."))
